/*******************************************************************************
																				
	DESCRIPTION:  	This do file produces binned scatterplots of empirical JFR 
					on predicted JFR for the baseline model.
	
*******************************************************************************/

clear all
global id_code 113
pause off
set seed 2110

/*******************************************************************************
*	1. Baseline
********************************************************************************/
local model Full		
local year 2006		

* Load the data
use "${data}/003_MainWithEnsemblePred_`model'_`year'.dta", clear

gen DataYear = `year' // DataYear indicates which year individuals comes from


 * Obtain a scatter plot of empirical vs predicted JFR
foreach months in 6 {
		  
	foreach unempl in 0 6 12 {

		preserve 
		keep if `year'==year(startU + `unempl'*30)

		* Regress empirical JFR on predicted JFR and store coefficients and R-squared
		reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In
		local b0 : display %4.2f _b[_cons]
		local b1 : display %4.2f _b[p_emplAft`months'M_`unempl'M_In]
		local se0 : display %4.2f _se[_cons]
		local se1 : display %4.2f _se[p_emplAft`months'M_`unempl'M_In]
		local r2 : display %5.3f `e(r2)'
		local n : display %9.0fc `e(N)'
		
		* Now we generate bins with equal mass (i.e., quantiles):
		xtile bin = p_emplAft`months'M_`unempl'M_In, nq(20)
		
		* Calculated the average empirical JFR in each bin
		collapse (mean) emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, by(bin)

		* Predict the JFR based on the coefficients from the regression and bins
		predict prediction_line 

		* Generate helper for axis label:
		
		if `unempl' != 0 { 
			local jfr "`months'-Month JFR `unempl' Months into Spell"
		}
		else {
			local jfr "`months'-Month JFR at Start of Spell"
		}
		
		* Plot the average empirical JFR in each bin of predicted JFR
		
		twoway ///
			(line prediction_line p_emplAft`months'M_`unempl'M_In, lcolor(orange_red)) (function y=x, lcolor(gs7) lpattern(dash)) ///
			(scatter emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, mfcolor(ebblue) mlcolor(navy)), ///
			graphregion(color(white)) legend(order(1 "{bf:Regression Line}" - "Intercept = `b0' (`se0')" - "Slope = `b1' (`se1')" /* - "R{superscript:2} = `r2'" */ - "N = `n'") symxsize(*0.5) size(small) cols(1) pos(4) ring(0))			///
			ytitle("Average `jfr'") ///
			xtitle("Predicted `jfr'") ///
			ylabel(0(0.2)1, angle(0)) ///
			xscale(titlegap(2)) yscale(titlegap(2)) ///
			name(scatter_`months'M_`unempl'MIn_equal, replace)
		graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_EqualSizedBins.pdf", as(pdf) replace 
		
		
		restore
	}
}


*******************************************************************************
 * Robustness: splitting into subsamples
*******************************************************************************		

* Merge with sample data:
merge 1:1 LopNr_PersonNr InLnr using "${data}/001_9_FinalMainDataset.dta", ///
	 keepusing(Gender foreign EducLevel L_WageInc_adj L_OtherInc_adj L_FamInc_adj DaysUnemp_5Years DaysOnDI_5Years) assert(2 3) keep(3) nogen

* Generate the dummies:
local vars _
local model Full		
local year 2006		
local months 6 
local unempl 0
gen L_TotInc_adj = L_WageInc_adj + L_OtherInc_adj
xtile median_L_TotInc_adj = L_TotInc_adj, nq(2) 
xtile dec_L_TotInc_adj = L_TotInc_adj, nq(10) 


xtile median_EducLevel = EducLevel, nq(2)

xtile median_DaysUnemp_5Years = DaysUnemp_5Years, nq(2)

xtile median_DaysOnDI_5Years = DaysOnDI_5Years, nq(2)

label define median_lab 1 "Below Median" 2 "Above Median"

label variable median_L_TotInc_adj "Individual Income"
label variable median_EducLevel "Education Level"
label variable median_DaysUnemp_5Years "Days on UI (5y)"
label variable median_DaysOnDI_5Years "Days on DI (5y)"


label values median_L_TotInc_adj median_lab
label values median_EducLevel median_lab
label values median_DaysUnemp_5Years median_lab
label values median_DaysOnDI_5Years median_lab


* Produce the graphs:
local vars _
local model Full		
local year 2006		
local months 6 
local unempl 0

foreach v in median_L_TotInc_adj median_EducLevel Gender foreign median_DaysUnemp_5Years median_DaysOnDI_5Years {
	
	local i = 1
	
	if "`v'" == "Gender" | "`v'" == "foreign" {
		local nu 0/1
	}
	else {
		local nu 1/2
	}

	forval x = `nu' {		
		preserve
		keep if `year'==year(startU + `unempl'*30)

		keep if `v' == `x'
		
		* Regress empirical JFR on predicted JFR and store coefficients and R-squared
		reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In
		local b0 : display %4.2f _b[_cons]
		local b1 : display %4.2f _b[p_emplAft`months'M_`unempl'M_In]
		local se0 : display %4.2f _se[_cons]
		local se1 : display %4.2f _se[p_emplAft`months'M_`unempl'M_In]
		local r2 : display %5.3f `e(r2)'
		local n : display %9.0fc `e(N)'
	
		* Now we generate bins with equal mass (i.e., quantiles):
		xtile bin = p_emplAft`months'M_`unempl'M_In, nq(20)
		
		* Calculated the average empirical JFR in each bin
		bys bin: egen outcome_average=mean(emplAft`months'M_`unempl'M_In)
		bys bin: egen pred_average=mean(p_emplAft`months'M_`unempl'M_In)
		
		duplicates drop bin, force
		
		drop emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In
		rename outcome_average emplAft`months'M_`unempl'M_In
		rename pred_average p_emplAft`months'M_`unempl'M_In
		
		* Predict the JFR based on the coefficients from the regression and bins
		predict prediction_line 
		
		* Plot the average empirical JFR in each bin of predicted JFR
		twoway ///
			(line prediction_line p_emplAft`months'M_`unempl'M_In, lcolor(orange_red)) (function y=x, lcolor(gs7) lpattern(dash)) ///
			(scatter emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, mfcolor(ebblue) mlcolor(navy)), ///
			graphregion(color(white)) legend(order(1 "{bf:Regression Line}" - "Intercept = `b0' (`se0')" - "Slope = `b1' (`se1')" /* - "R{superscript:2} = `r2'" */ - "N = `n'") symxsize(*0.5) size(small) cols(1) pos(4) ring(0))			///
			title("`: label (`v') `x''")			///
			ytitle("Average `months'-Month JFR `unempl' Months into Spell") ///
			xtitle("Predicted `months'-Month JFR `unempl' Months into Spell (`year' Sample, Full Model)") ///
			ylabel(0(0.2)1, angle(0)) ///
			xscale(titlegap(2)) yscale(titlegap(2)) ///
			name(group`i++', replace)
	
		restore
		graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_EqualSizedBins_Subsample_`v'.pdf", as(pdf) replace 
	}
	
	* Combine graphs:
	graph combine group1 group2, ycommon title("`: variable label `v''") ///
	 ysize(4.5) xsize(10) graphregion(color(white)) ///
	name(`v', replace)
	
	graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_EqualSizedBins_Subsample_`v'.pdf", as(pdf) replace 
		
}

*******************************************************************************
 * Robustness: by 160 groups by observables
*******************************************************************************		
pause off

* First, regress observed JFR on predicted JFR:
local vars _
local model Full		
local year 2006		
local months 6 
foreach unempl in 0 6 12 {
	frame copy default bins_`unempl'M, replace
	frame change bins_`unempl'M

	keep if `year'==year(startU + `unempl'*30) & !missing(emplAft`months'M_`unempl'M_In, p_emplAft`months'M_`unempl'M_In)
	
	* Calculated the average empirical JFR in each group
	collapse (count) InLnr (mean) emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, by(dec_L_TotInc_adj Gender median_DaysUnemp_5Years median_DaysOnDI_5Years foreign)
	
	* Regress mean empirical JFR on mean predicted JFR and store coefficients and R-squared
	reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In [aweight = InLnr]
	local b0 : display %4.2f _b[_cons]
	local b1 : display %4.2f _b[p_emplAft`months'M_`unempl'M_In]
	local se0 : display %4.2f _se[_cons]
	local se1 : display %4.2f _se[p_emplAft`months'M_`unempl'M_In]
	local r2 : display %5.3f `e(r2)'
	local n : display %9.0fc `e(N)'

	* Predict the JFR based on the coefficients from the regression and bins
	predict prediction_line 

	* Plot the average empirical JFR in each bin of predicted JFR

	twoway ///
		(line prediction_line p_emplAft`months'M_`unempl'M_In, lcolor(orange_red)) (function y=x, lcolor(gs7) lpattern(dash)) ///
		(scatter emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, mfcolor(ebblue) mlcolor(navy)), ///
		graphregion(color(white)) legend(order(1 "{bf:Regression Line}" - "Intercept = `b0' (`se0')" - "Slope = `b1' (`se1')" /* - "R{superscript:2} = `r2'" */ - "N = `n'") symxsize(*0.5) size(small) cols(1) pos(4) ring(0))			///
		ytitle("Average `months'-Month JFR `unempl' Months into Spell") ///
		xtitle("Predicted `months'-Month JFR `unempl' Months into Spell (`year' Sample, Full Model)") ///
		ylabel(0(0.2)1, angle(0)) ///
		xscale(titlegap(2)) yscale(titlegap(2)) ///
		name(scatter_`months'M_`unempl'MIn_160groups, replace)
	
	graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_144Groups.pdf", as(pdf) replace 
	pause
	
	frame change default
}

*******************************************************************************
 * Robustness: by 40 income decile x gender x days on UI groups
*******************************************************************************		
pause off

* First, regress observed JFR on predicted JFR:
local vars _
local model Full		
local year 2006		
local months 6 
foreach unempl in 0 6 12 {
	frame copy default bins_`unempl'M, replace
	frame change bins_`unempl'M

	keep if `year'==year(startU + `unempl'*30) & !missing(emplAft`months'M_`unempl'M_In, p_emplAft`months'M_`unempl'M_In)
	
	* Calculated the average empirical JFR in each group
	collapse (count) InLnr (mean) emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, by(dec_L_TotInc_adj Gender median_DaysUnemp_5Years)
	
	* Regress mean empirical JFR on mean predicted JFR and store coefficients and R-squared
	reg emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In [aweight = InLnr]
	local b0 : display %4.2f _b[_cons]
	local b1 : display %4.2f _b[p_emplAft`months'M_`unempl'M_In]
	local se0 : display %4.2f _se[_cons]
	local se1 : display %4.2f _se[p_emplAft`months'M_`unempl'M_In]
	local r2 : display %5.3f `e(r2)'
	local n : display %9.0fc `e(N)'

	* Predict the JFR based on the coefficients from the regression and bins
	predict prediction_line 

	* Plot the average empirical JFR in each bin of predicted JFR

	twoway ///
		(line prediction_line p_emplAft`months'M_`unempl'M_In, lcolor(orange_red)) (function y=x, lcolor(gs7) lpattern(dash)) ///
		(scatter emplAft`months'M_`unempl'M_In p_emplAft`months'M_`unempl'M_In, mfcolor(ebblue) mlcolor(navy)), ///
		graphregion(color(white)) legend(order(1 "{bf:Regression Line}" - "Intercept = `b0' (`se0')" - "Slope = `b1' (`se1')" /* - "R{superscript:2} = `r2'" */ - "N = `n'") symxsize(*0.5) size(small) cols(1) pos(4) ring(0))			///
		ytitle("Average `months'-Month JFR `unempl' Months into Spell") ///
		xtitle("Predicted `months'-Month JFR `unempl' Months into Spell (`year' Sample, Full Model)") ///
		ylabel(0(0.2)1, angle(0)) ///
		xscale(titlegap(2)) yscale(titlegap(2)) ///
		name(scatter_`months'M_`unempl'MIn_40groups, replace)
		
	graph export "${output}/${id_code}_Predicted`months'MJFR_onEmpirical_`unempl'M_`model'_36Groups.pdf", as(pdf) replace 
	pause
	
	frame change default
}
